library(datasauRus)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.3 ✔ purrr 0.3.4
## ✔ tibble 3.1.0 ✔ dplyr 1.0.5
## ✔ tidyr 1.1.3 ✔ stringr 1.4.0
## ✔ readr 1.4.0 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
dino <- datasaurus_dozen %>%
filter(dataset == "dino") %>%
select(-dataset)
dino
## # A tibble: 142 x 2
## x y
## <dbl> <dbl>
## 1 55.4 97.2
## 2 51.5 96.0
## 3 46.2 94.5
## 4 42.8 91.4
## 5 40.8 88.3
## 6 38.7 84.9
## 7 35.6 79.9
## 8 33.1 77.6
## 9 29.0 74.5
## 10 26.2 71.4
## # … with 132 more rows
dino %>%
ggplot(aes(x, y)) +
geom_point()

library(tidyverse)
dept <- c("Physics", "Mathematics", "Statistics",
"Computer Science")
nstaff <- c(12L, 8L, 20L, 23L)
sci_tbl <- tibble(dept = dept, count = nstaff)
sci_tbl
## # A tibble: 4 x 2
## dept count
## <chr> <int>
## 1 Physics 12
## 2 Mathematics 8
## 3 Statistics 20
## 4 Computer Science 23
barplot(as.matrix(sci_tbl$count),
legend = sci_tbl$dept)

pie(sci_tbl$count,
labels = sci_tbl$dept)

library(ggplot2)
ggplot(data = sci_tbl) +
geom_bar(
aes(x = "", y = count, fill = dept),
stat = "identity"
)

ggplot(data = sci_tbl) +
geom_bar(
aes(x = "", y = count, fill = dept),
stat = "identity"
) +
coord_polar(theta = "y") #<<

ggplot(data = sci_tbl, mapping = aes(x = dept, y = count)) +
layer(geom = "bar", stat = "identity", position = "identity")

p <- ggplot(sci_tbl, aes(x = dept, y = count)) #<<
p

p +
geom_bar(stat = "identity")

p +
geom_col()

p +
geom_point()

p +
geom_segment(aes(xend = dept, y = 0, yend = count))

p +
geom_point() +
geom_segment(aes(xend = dept, y = 0, yend = count))

sci_tbl0 <- uncount(sci_tbl, count)
sci_tbl0
## # A tibble: 63 x 1
## dept
## <chr>
## 1 Physics
## 2 Physics
## 3 Physics
## 4 Physics
## 5 Physics
## 6 Physics
## 7 Physics
## 8 Physics
## 9 Physics
## 10 Physics
## # … with 53 more rows
ggplot(sci_tbl, aes(x = dept, y = count)) +
geom_bar(stat = "identity")

ggplot(sci_tbl0, aes(x = dept)) +
geom_bar(stat = "count")

p +
geom_col(aes(colour = dept))

p +
geom_col(aes(fill = dept))

p +
geom_col(fill = "#756bb1")

p +
geom_col(aes(fill = dept), colour = "#000000")

p +
geom_point(aes(size = count))

p +
geom_col(aes(fill = dept)) +
coord_polar(theta = "y") #<<

p +
geom_col(aes(fill = dept)) +
theme_bw() #<<

library(ggthemes)
p +
geom_col(aes(fill = dept)) +
theme_economist() #<<

p +
geom_col(aes(fill = dept)) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.1))

mpg
## # A tibble: 234 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manual… f 21 29 p comp…
## 3 audi a4 2 2008 4 manual… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto(a… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto(l… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manual… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto(a… f 18 27 p comp…
## 8 audi a4 quat… 1.8 1999 4 manual… 4 18 26 p comp…
## 9 audi a4 quat… 1.8 1999 4 auto(l… 4 16 25 p comp…
## 10 audi a4 quat… 2 2008 4 manual… 4 20 28 p comp…
## # … with 224 more rows
p_mpg <- ggplot(mpg, aes(displ, cty)) +
geom_point(aes(colour = drv))
p_mpg

p_mpg +
facet_grid(rows = vars(drv))

# facet_grid(~ drv)
p_mpg +
facet_grid(cols = vars(drv))

# facet_grid(drv ~ .)
p_mpg +
facet_grid(rows = vars(drv), cols = vars(cyl))

# facet_grid(cyl ~ drv)
p_mpg +
facet_wrap(vars(drv, cyl), ncol = 3)

# facet_wrap(~ drv + cyl, ncol = 3)
movies <- as_tibble(jsonlite::read_json(
"https://vega.github.io/vega-editor/app/data/movies.json",
simplifyVector = TRUE))
movies
## # A tibble: 3,201 x 16
## Title US_Gross Worldwide_Gross US_DVD_Sales Production_Budg… Release_Date
## <chr> <int> <dbl> <int> <int> <chr>
## 1 The Land… 146083 146083 NA 8000000 12-Jun-98
## 2 First Lo… 10876 10876 NA 300000 7-Aug-98
## 3 I Marrie… 203134 203134 NA 250000 28-Aug-98
## 4 Let's Ta… 373615 373615 NA 300000 11-Sep-98
## 5 Slam 1009819 1087521 NA 1000000 9-Oct-98
## 6 Mississi… 24551 2624551 NA 1600000 15-Jan-99
## 7 Following 44705 44705 NA 6000 4-Apr-99
## 8 Foolish 6026908 6026908 NA 1600000 9-Apr-99
## 9 Pirates 1641825 6341825 NA 40000000 1-Jul-86
## 10 Duel in … 20400000 20400000 NA 6000000 31-Dec-46
## # … with 3,191 more rows, and 10 more variables: MPAA_Rating <chr>,
## # Running_Time_min <int>, Distributor <chr>, Source <chr>, Major_Genre <chr>,
## # Creative_Type <chr>, Director <chr>, Rotten_Tomatoes_Rating <int>,
## # IMDB_Rating <dbl>, IMDB_Votes <int>
skimr::skim(movies)
Data summary
| Name |
movies |
| Number of rows |
3201 |
| Number of columns |
16 |
| _______________________ |
|
| Column type frequency: |
|
| character |
8 |
| numeric |
8 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| Title |
1 |
1.00 |
1 |
66 |
0 |
3176 |
0 |
| Release_Date |
7 |
1.00 |
8 |
11 |
0 |
1603 |
0 |
| MPAA_Rating |
605 |
0.81 |
1 |
9 |
0 |
7 |
0 |
| Distributor |
232 |
0.93 |
3 |
33 |
0 |
174 |
0 |
| Source |
365 |
0.89 |
6 |
29 |
0 |
18 |
0 |
| Major_Genre |
275 |
0.91 |
5 |
19 |
0 |
12 |
0 |
| Creative_Type |
446 |
0.86 |
7 |
23 |
0 |
9 |
0 |
| Director |
1331 |
0.58 |
7 |
27 |
0 |
550 |
0 |
Variable type: numeric
| US_Gross |
7 |
1.00 |
44002085.16 |
62555311.39 |
0.0 |
5493221.2 |
22019465.5 |
56091761.5 |
760167650.0 |
▇▁▁▁▁ |
| Worldwide_Gross |
7 |
1.00 |
85343400.14 |
149947342.89 |
0.0 |
8031285.2 |
31168926.5 |
97283797.0 |
2767891499.0 |
▇▁▁▁▁ |
| US_DVD_Sales |
2637 |
0.18 |
34901546.82 |
45895121.60 |
618454.0 |
9906210.8 |
20331557.5 |
37794215.8 |
352582053.0 |
▇▁▁▁▁ |
| Production_Budget |
1 |
1.00 |
31069171.45 |
35585913.44 |
218.0 |
6575000.0 |
20000000.0 |
42000000.0 |
300000000.0 |
▇▁▁▁▁ |
| Running_Time_min |
1992 |
0.38 |
110.19 |
20.17 |
46.0 |
95.0 |
107.0 |
121.0 |
222.0 |
▁▇▃▁▁ |
| Rotten_Tomatoes_Rating |
880 |
0.73 |
54.34 |
28.08 |
1.0 |
30.0 |
55.0 |
80.0 |
100.0 |
▅▆▆▇▇ |
| IMDB_Rating |
213 |
0.93 |
6.28 |
1.25 |
1.4 |
5.6 |
6.4 |
7.2 |
9.2 |
▁▁▅▇▂ |
| IMDB_Votes |
213 |
0.93 |
29908.64 |
44937.58 |
18.0 |
4828.5 |
15106.0 |
35810.5 |
519541.0 |
▇▁▁▁▁ |
ggplot(movies, aes(x = IMDB_Rating, y = Rotten_Tomatoes_Rating)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "gam") +
theme(aspect.ratio = 1)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 941 rows containing non-finite values (stat_smooth).
## Warning: Removed 941 rows containing missing values (geom_point).

ggplot(movies, aes(x = IMDB_Rating, y = Rotten_Tomatoes_Rating)) +
geom_hex() +
theme(aspect.ratio = 1)
## Warning: Removed 941 rows containing non-finite values (stat_binhex).

ggplot(movies, aes(y = Major_Genre)) +
geom_bar()

ggplot(movies) +
geom_boxplot(aes(x = IMDB_Rating, y = Major_Genre))
## Warning: Removed 213 rows containing non-finite values (stat_boxplot).

ggplot(movies) +
geom_density(aes(x = IMDB_Rating, fill = Major_Genre))
## Warning: Removed 213 rows containing non-finite values (stat_density).

library(ggridges)
ggplot(movies, aes(x = IMDB_Rating, y = Major_Genre)) +
geom_density_ridges(aes(fill = Major_Genre))
## Picking joint bandwidth of 0.356
## Warning: Removed 213 rows containing non-finite values (stat_density_ridges).

ggplot(movies) +
geom_density(aes(x = IMDB_Rating, fill = Major_Genre)) +
facet_wrap(vars(Major_Genre))
## Warning: Removed 213 rows containing non-finite values (stat_density).
